import requests
import pymysql
from lxml import etree
from urllib import parse

shijian = "2022-08-31"

url = 'http://www.dangjian.com/shouye/dangjianwenhua/dangshigushi/'

headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/104.0.0.0 Safari/537.36"
}
response = requests.get(url, headers=headers)
response.encoding = "utf-8"  # 处理获取的内容乱码






# # content_type = response.headers['content-type']
# # print(content_type) # 当前获取的网页编码
#
# html_data = etree.HTML(response.text)
# news_list = html_data.xpath('//div[@class="main"]/div[@class="inner"]/div[@class="main-left"]/ul[1]/li')
#
# for res in news_list:
#     if(res.xpath("span/text()")[0] == shijian):
#         wenzhang = parse.urljoin("http://www.dangjian.com/shouye/dangjianwenhua/dangshigushi/", res.xpath("div/a/@href")[0])
#
#         wenzhang_html = requests.get(wenzhang, headers=headers)
#         wenzhang_html.encoding = "utf-8"  # 处理获取的内容乱码
#
#         wenzhang_data = etree.HTML(wenzhang_html.text)
#         biaoti = wenzhang_data.xpath('//div[@id="title_tex"]/text()')
#         print(biaoti[0])
#
#         fabushijian = wenzhang_data.xpath('//span[@id="time_tex"]/text()')
#         print(fabushijian[0])
#
#         content = wenzhang_data.xpath('//div[@id="tex"]')
#         print(etree.tostring(content[0]))
















